{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import time\n",
    "import pymapd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Connection(mapd://mapd:***@35.223.201.190:6274/mapd?protocol=binary)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Query loading time for localhost Omnisci server\n",
    "con = pymapd.connect(user=\"mapd\", password=\"HyperInteractive\", host=\"localhost\", dbname=\"mapd\")\n",
    "con\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['flight_year', 'flight_month', 'flight_dayofmonth', 'flight_dayofweek',\n",
      "       'deptime', 'crsdeptime', 'arrtime', 'crsarrtime', 'uniquecarrier',\n",
      "       'flightnum', 'tailnum', 'actualelapsedtime', 'crselapsedtime',\n",
      "       'airtime', 'arrdelay', 'depdelay', 'origin', 'dest', 'distance',\n",
      "       'taxiin', 'taxiout', 'cancelled', 'cancellationcode', 'diverted',\n",
      "       'carrierdelay', 'weatherdelay', 'nasdelay', 'securitydelay',\n",
      "       'lateaircraftdelay', 'dep_timestamp', 'arr_timestamp', 'carrier_name',\n",
      "       'plane_type', 'plane_manufacturer', 'plane_issue_date', 'plane_model',\n",
      "       'plane_status', 'plane_aircraft_type', 'plane_engine_type',\n",
      "       'plane_year', 'origin_name', 'origin_city', 'origin_state',\n",
      "       'origin_country', 'origin_lat', 'origin_lon', 'dest_name', 'dest_city',\n",
      "       'dest_state', 'dest_country', 'dest_lat', 'dest_lon', 'origin_merc_x',\n",
      "       'origin_merc_y', 'dest_merc_x', 'dest_merc_y'],\n",
      "      dtype='object')\n",
      "   flight_year  flight_month  flight_dayofmonth  flight_dayofweek  deptime  \\\n",
      "0         2008             1                  3                 4     2003   \n",
      "\n",
      "   crsdeptime  arrtime  crsarrtime uniquecarrier  flightnum  ...  \\\n",
      "0        1955     2211        2225            WN        335  ...   \n",
      "\n",
      "             dest_name  dest_city  dest_state  dest_country   dest_lat  \\\n",
      "0  Tampa International      Tampa          FL           USA  27.975471   \n",
      "\n",
      "    dest_lon origin_merc_x origin_merc_y  dest_merc_x  dest_merc_y  \n",
      "0 -82.533249    -8622341.0     4713729.5   -9187559.0   3245881.75  \n",
      "\n",
      "[1 rows x 56 columns]\n"
     ]
    }
   ],
   "source": [
    "query = 'SELECT * FROM flights_2008_10k LIMIT 1'\n",
    "df = pd.read_sql(query, con)\n",
    "print(df.columns)\n",
    "print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "time taken for query 0.215620756149292 seconds\n",
      "time taken for pd read query 0.2323322296142578 seconds\n"
     ]
    }
   ],
   "source": [
    "query = 'SELECT depdelay, arrdelay, airtime, carrier_name FROM flights_2008_10k LIMIT 5000'\n",
    "\n",
    "start = time.time()\n",
    "con.execute(query)\n",
    "end = time.time()\n",
    "print('time taken for query', end-start, 'seconds')\n",
    "\n",
    "\n",
    "start = time.time()\n",
    "df = pd.read_sql(query, con)\n",
    "end = time.time()\n",
    "print('time taken for pd read query', end-start, 'seconds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    AVG_arrdelay    AVG_lat     AVG_lon dest_state\n",
      "0       2.241771  27.938562  -81.474062         FL\n",
      "1       8.002008  39.175589  -76.668000         MD\n",
      "2      24.322540  36.603217 -115.857502         NV\n",
      "3       8.744578  39.023410  -92.542245         MO\n",
      "4      10.322630  41.785709  -87.752872         IL\n",
      "5      20.397218  33.304324 -111.902758         AZ\n",
      "6       8.620501  30.962280  -97.742533         TX\n",
      "7       2.182927  33.562967  -86.753525         AL\n",
      "8       8.639216  36.124451  -86.677910         TN\n",
      "9      25.980000  39.717314  -86.294414         IN\n",
      "10     14.647887  37.805695  -76.758816         VA\n",
      "11      6.912500  40.042856  -76.617340         PA\n",
      "12     17.969231  35.040174 -106.609325         NM\n",
      "13     14.189655  41.859535  -74.851838         NY\n",
      "14     17.759259  41.938863  -72.683168         CT\n",
      "15     21.500000  43.564449 -116.222698         ID\n",
      "16     33.374187  35.619374 -119.822581         CA\n",
      "17      9.656716  40.535783  -82.495161         OH\n",
      "18     18.209581  39.858401 -104.667057         CO\n",
      "19     21.906593  47.494797 -121.030524         WA\n",
      "20     14.974359  34.729383  -92.224196         AR\n",
      "21      9.232558  42.934553  -71.437023         NH\n",
      "22      3.883495  29.993403  -90.257917         LA\n",
      "23      8.129310  35.767978  -96.803534         OK\n",
      "24     27.695652  41.302498  -95.894213         NE\n",
      "25     28.239316  45.588729 -122.597599         OR\n",
      "26     12.635417  41.724056  -71.428223         RI\n",
      "27      8.966667  35.877667  -78.787560         NC\n",
      "28      9.068966  38.174405  -85.736000         KY\n",
      "29     27.790210  40.788415 -111.977600         UT\n",
      "30     10.535714  42.212061  -83.348826         MI\n",
      "31     13.846154  32.311160  -90.075900         MS\n"
     ]
    }
   ],
   "source": [
    "cholo_query = 'SELECT AVG(arrdelay) AS AVG_arrdelay, AVG(dest_lat) AS AVG_lat, AVG(dest_lon) AS AVG_lon, dest_state FROM flights_2008_10k GROUP BY dest_state'\n",
    "cholo_df = pd.read_sql(cholo_query, con)\n",
    "\n",
    "print(cholo_df)\n",
    "\n",
    "zmin=min(cholo_df['AVG_arrdelay'])\n",
    "zmax=max(cholo_df['AVG_arrdelay'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>avg_delay</th>\n",
       "      <th>origin_city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.357143</td>\n",
       "      <td>Islip</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   avg_delay origin_city\n",
       "0   3.357143       Islip"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Query for city data\n",
    "\n",
    "table=\"flights_2008_10k\"\n",
    "count_query = \"SELECT AVG(arrdelay) AS avg_delay, origin_city FROM {0} WHERE origin_state='NY' AND flight_dayofweek = 4 group by origin_city\".format(\n",
    "            table\n",
    "        )\n",
    "\n",
    "count_df = pd.read_sql(count_query, con)\n",
    "\n",
    "count_df\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         dep_timestamp  deptime\n",
      "0  2008-01-03 20:03:00     2003\n",
      "1  2008-01-03 07:54:00      754\n",
      "2  2008-01-03 06:28:00      628\n",
      "3  2008-01-03 09:26:00      926\n",
      "4  2008-01-03 18:29:00     1829\n",
      "..                 ...      ...\n",
      "95 2008-01-03 15:06:00     1506\n",
      "96 2008-01-03 20:39:00     2039\n",
      "97 2008-01-03 09:24:00      924\n",
      "98 2008-01-03 16:11:00     1611\n",
      "99 2008-01-03 18:24:00     1824\n",
      "\n",
      "[100 rows x 2 columns]\n",
      "{68: 4, 55: 10, 54: 9, 57: 7, 66: 7, 67: 7, 58: 6, 64: 6, 62: 8, 65: 5, 61: 6, 56: 7, 63: 6, 69: 3, 59: 4, 70: 2, 60: 3}\n"
     ]
    }
   ],
   "source": [
    "# Time series\n",
    "import datetime\n",
    "\n",
    "# x: binned datetime, y: count, aggregate(count) in plotly\n",
    "\n",
    "ts_query_x = \"SELECT dep_timestamp, deptime from flights_2008_10k LIMIT 100\"\n",
    "min_ts_query_x = \"SELECT dep_timestamp, deptime from flights_2008_10k ORDER BY dep_timestamp ASC LIMIT 1\"\n",
    "\n",
    "df=pd.read_sql(ts_query_x, con)\n",
    "\n",
    "print(df)\n",
    "\n",
    "\n",
    "start_time = datetime.datetime(2008, 1, 1)\n",
    "\n",
    "\n",
    "binned_x = []\n",
    "count = {}\n",
    "for x_val in df['dep_timestamp']:\n",
    "    elapsed_seconds = (x_val - start_time).total_seconds()\n",
    "    hour = int(elapsed_seconds/3600)\n",
    "    \n",
    "    \n",
    "    if hour not in count.keys():\n",
    "        count[hour]=1\n",
    "    else:\n",
    "        count[hour]+=1\n",
    "        \n",
    "\n",
    "print(count)\n",
    "\n",
    "x = []\n",
    "y=[]\n",
    "for key in count.keys():\n",
    "    x_timestamp = start_time + datetime.timedelta(seconds=3600*key)\n",
    "    x.append(x_timestamp)\n",
    "    y.append(count[key])\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
